library(tidyverse)
library(haven)
library(dplyr)
library(knitr)
library(survey)
library(xtable)

###SET WORKING DIRECTORY and LOAD FILES ----

set.seed(12345678)


df1 <- read_csv('b_data/6_wide_data_clean.csv')

### Recoding socio-demographic characteristics ----

df1 <- df1 %>%
  mutate(gender = 
           case_when(gender == 1 ~ 'Male',
                     gender == 2 ~ 'Female'),
         educ = 
           case_when(educ == 1 ~ 'No HS',
                     educ == 2 ~ 'HS graduate',
                     educ == 3 ~ 'Some college',
                     educ == 4 ~ '2-year college',
                     educ == 5 ~ '4-year college',
                     educ == 6 ~ 'Post-grad'),
         employ = 
           case_when(employ == 1 ~ "Full-time",
                     employ == 2 ~ "Part-time",
                     employ == 3 ~ "Temporarily laid off",
                     employ == 4 ~ "Unemployed",
                     employ == 5 ~ "Retired",
                     employ == 6 ~ "Permanently disabled",
                     employ == 7 ~ "Homemaker",
                     employ == 8 ~ "Student",
                     employ == 9 ~ "Other"),
         income = 
           case_when(faminc_new == 1 ~ "Less than 19,999",
                     faminc_new == 2 ~ "Less than 19,999",
                     faminc_new == 3 ~ "Between 20,000 and 39,999",
                     faminc_new == 4 ~ "Between 20,000 and 39,999",
                     faminc_new == 5 ~ "Between 40,000 and 59,999",
                     faminc_new == 6 ~ "Between 40,000 and 59,999",
                     faminc_new == 7 ~ "Between 60,000 and 99,999",
                     faminc_new == 8 ~ "Between 60,000 and 99,999",
                     faminc_new == 9 ~ "Between 60,000 and 99,999",
                     faminc_new == 10 ~ "More than 100,000",
                     faminc_new == 11 ~ "More than 100,000",
                     faminc_new == 12 ~ "More than 100,000",
                     faminc_new == 13 ~ "More than 100,000",
                     faminc_new == 14 ~ "More than 100,000",
                     faminc_new == 15 ~ "More than 100,000",
                     faminc_new == 16 ~ "More than 100,000"),
         religion = 
           case_when(religpew == 1 ~ 'Protestant',
                     religpew == 2 ~ 'Roman Catholic',
                     religpew == 3 ~ 'Other',
                     religpew == 4 ~ 'Other',
                     religpew == 5 ~ 'Other',
                     religpew == 6 ~ 'Other',
                     religpew == 7 ~ 'Other',
                     religpew == 8 ~ 'Other',
                     religpew == 9 ~ 'Atheist',
                     religpew == 10 ~ 'Agnostic',
                     religpew == 11 ~ 'Nothing in particular',
                     religpew == 12 ~ 'Other'))


df1$age <- 2020 - df1$birthyr


### Recoding political characteristics ----
df1 <- df1 %>%
  mutate(party = 
           case_when(pid7 == 1 ~ 'Democrat',
                     pid7 == 2 ~ 'Democrat',
                     pid7 == 3 ~ 'Democrat',
                     pid7 == 4 ~ 'Independent',
                     pid7 == 5 ~ 'Republican',
                     pid7 == 6 ~ 'Republican',
                     pid7 == 7 ~ 'Republican'),
         vote16 = 
           case_when(presvote16post == 1 ~ 'Clinton',
                     presvote16post == 2 ~ 'Trump',
                     presvote16post == 3 ~ 'Other',
                     presvote16post == 4 ~ 'Other',
                     presvote16post == 5 ~ 'Other',
                     presvote16post == 6 ~ 'Other',
                     presvote16post == 7 ~ 'Did not vote for president'),
         ideology =
           case_when(ideo5 == 1 ~ "Very liberal",
                     ideo5 == 2 ~ "Liberal", 
                     ideo5 == 3 ~ "Moderate",
                     ideo5 == 4 ~ "Conservative",
                     ideo5 == 5 ~ "Very conservative",
                     ideo5 == 6 ~ "Not sure"),
         ideology = factor(ideology, 
                           levels = c("Very conservative", "Conservative", 
                                      "Moderate", "Liberal", "Very liberal",
                                      "Not sure"),
                           ordered = TRUE),
         registered_vote =
           case_when(votereg == 1 ~ "Yes",
                     votereg == 2 ~ "No",
                     votereg == 3 ~ "Don't know"))


###descriptive analysis----
### sub-setting data
df_black = subset(df1, race == "Black respondent")
df_white = subset(df1, race == "White respondent")

### setting up weights 
svydesign_black <- svydesign(id = ~1, data = df_black, weights = ~weight)
svydesign_white <- svydesign(id = ~1, data = df_white, weights = ~weight)

### variables for checking 

vars <- c("gender", "educ", "employ", "income", "religion","ideology", "party", "registered_vote", "vote16",
              "amerimport","raceimport","prototypical")

###generate proportion tables for black respondents ----

prop_tables_black <- lapply(vars, function(var) {
  prop_table <- svytable(as.formula(paste("~", var)), svydesign_black)
  prop_table <- prop.table(prop_table)
  return(prop_table)
})

###generate proportion tables for white respondents ----
prop_tables_white <- lapply(vars, function(var) {
  prop_table <- svytable(as.formula(paste("~", var)), svydesign_white)
  prop_table <- prop.table(prop_table)
  return(prop_table)
})

###check for age
median_age_black <- median(df_black$age)

median_age_white <- median(df_white$age)

output_file <- "d_tables/prop_tables.tex"
fileConn <- file(output_file, open = "wt")
for (i in 1:length(vars)) {
  var_name <- vars[i]
  
  # Black respondents
  xtable_object_black <- xtable(prop_tables_black[[i]], 
                                caption = paste("Proportion Table for", var_name, "(Black Respondents)"), 
                                label = paste("tab:", var_name, "_black"))
  table_latex_black <- print(xtable_object_black, type = "latex", include.rownames = TRUE, print.results = FALSE)
  writeLines(paste("\\subsection*{", var_name, " (Black Respondents)}", sep=""), fileConn)
  writeLines(table_latex_black, fileConn)
  writeLines("\n\n", fileConn) # Add some spacing between tables
  
  # White respondents
  xtable_object_white <- xtable(prop_tables_white[[i]], 
                                caption = paste("Proportion Table for", var_name, "(White Respondents)"), 
                                label = paste("tab:", var_name, "_white"))
  table_latex_white <- print(xtable_object_white, type = "latex", include.rownames = TRUE, print.results = FALSE)
  writeLines(paste("\\subsection*{", var_name, " (White Respondents)}", sep=""), fileConn)
  writeLines(table_latex_white, fileConn)
  writeLines("\n\n", fileConn) # Add some spacing between tables
}

  # Write median age for white respondents
  writeLines("\\section*{Median Age for Black Respondents}\n", fileConn)
  writeLines(paste("The median age for black respondents is", median_age_black, ".", sep=" "), fileConn)

  writeLines("\\section*{Median Age for White Respondents}\n", fileConn)
  writeLines(paste("The median age for white respondents is", median_age_white, ".", sep=" "), fileConn)
  
  
close(fileConn)

#### Clear environment ----

rm(list = ls())
